Load packages

Read in the data

all <- read_csv(here("data","sensors", "sensor-data_all.csv")) %>% 
  clean_names() %>%
  mutate(date_time=mdy_hms(date_time), #aapply lubridate to date/time column
         date=format(date_time, '%m/%d/%Y'), #create only date column
         time=format(date_time, '%H:%M:%S')) %>% #create only time column
 select(site, sensor_number, date_time, date, time, temp_c, p_h) %>%
    mutate(site=replace(site, site=="LOL", "Lompoc Landing"),
           site=replace(site, site=="ALG", "Alegria"),
           site=replace(site, site=="BML", "Bodega Bay")) #rename locations
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Site = col_character(),
##   `Sensor number` = col_double(),
##   `Download date` = col_character(),
##   `Calibration date` = col_character(),
##   `Temp_(C)` = col_double(),
##   `Voltage#1` = col_double(),
##   TK = col_double(),
##   `S(T)` = col_double(),
##   `Eo(T)` = col_double(),
##   pH = col_double(),
##   `Date time` = col_character()
## )

Plot up the pH

#set site order for plotting (legend)
all$site <- factor(all$site, levels=c("Alegria", "Lompoc Landing", "Bodega Bay"))


all_filter <- all %>%
  filter(p_h < 8.5) %>%
  mutate(removeit = ifelse(site=="Alegria" & date_time<ymd_hms("2021-06-30 23:00:00"), "remove", "keep")) %>%
  filter(removeit=="keep")

ggplot(all_filter, aes(x=date_time, y=p_h, group=site)) +
  geom_line(aes(color=site), size=0.7) +
  geom_point(aes(color=site), size=0.5) +
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  ylab("pH") +
  theme_bw() +
  theme(legend.title=element_blank(),
        axis.text.x=element_text(angle=45, vjust = 1, hjust=1, size=12),
        axis.title.x=element_text(size=15),
        axis.text.y=element_text(size=12),
        axis.title.y=element_text(size=15),
        legend.text=element_text(siz=12))

ggsave(here("figures", "sensors", "june-sept.png"), height=20, width=40, units="cm")

Plot temp up for good measure

ggplot(all, aes(x=date_time, y=temp_c, group=site)) +
  geom_line(aes(color=site), size=0.7) +
  geom_point(aes(color=site), size=0.5) +
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))

ggsave(here("figures", "sensors","june-sept_temp.png"), height=20, width=40, units="cm")

Plot each site, temp and pH

BML

bml <- all %>%
  filter(site=="Bodega Bay")

ggplot(bml, aes(x=date_time)) +
  geom_line(aes(y=p_h), color="red") + 
  geom_line(aes(y=temp_c), color="blue") + # Divide by 10 to get the same range than the temperature
  scale_y_continuous(name = "pH", #first axis name
    sec.axis = sec_axis(~., name="Temp (C)")) + #second axis name and features
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))

## Try it with pH and temp sorted as "groups"
bml2 <- bml %>%
  pivot_longer(cols=temp_c:p_h,
               names_to = "group",
               values_to = "value")

ggplot(bml2, aes(x=date_time, y=value, group=group)) +
  geom_line(aes(color=group), size=0.7) +
  #geom_point(aes(color=group), size=0.5) +
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))

Let’s come back to this…

Add tide cycles!

Customize HTML

Sites:

  • Bodega (Bodega%20Harbor%20entrance%2C%20California)
  • Lompoc (Point%20Arguello%2C%20California)
  • Alegria (Gaviota%2C%20California)

Glen: number of days to extract

Interval:

  • 10 minutes (00%3A10)
  • 15 minutes (00%3A15)
  • 1 hour (01%3A00)

BML

Scrape tide data from http://tbone.biol.sc.edu/tide/

bml_tide <- read_html("http://tide.arthroinfo.org/tideshow.cgi?tplotdir=horiz;gx=640;gy=240;caltype=ndp;type=mrare;interval=00%3A15;glen=83;units=feet;year=2021;month=05;day=31;hour=09;min=30;tzone=local;d_year=;d_month=01;d_day=01;d_hour=00;d_min=00;ampm24=24;site=Bodega%20Harbor%20entrance%2C%20California") %>%
  html_elements("pre") %>% #select only the date, time, and tide values from the webpage
  html_text2() %>% #convert list to data table
  data.frame() %>% #convert table to data frame
  mutate(date_tide = str_split(., pattern = "\n")) %>% #split into rows by each time point
  unnest(date_tide) %>% #unnest into two columns 
  mutate(date_tide=as.factor(date_tide)) %>% #make column values factors 
  separate(date_tide, into = c("date", "space", "time", "time_zone", "tide"), sep="\\s") %>% #separate the values (separated by spaces) into their own columns
  select(-"space", -".") %>% #remove the "space" (blank space) column and duplicated column created by unnest() 
  unite("time", "time", "time_zone", sep="\ ",) %>% #join together time and time zone
  unite("date_time", "date", "time", sep="\ ") %>% #join together date and time/time zone
  drop_na() %>% #remove final row with NA (not sure why that's even there)
  mutate(date_time=ymd_hm(date_time), #apply lubridate to date/time column
         tide=as.numeric(tide)) #coerce tide values from character to numeric
## Warning: Expected 5 pieces. Missing pieces filled with `NA` in 1 rows [7969].

Plot it up

ggplot(bml_tide, aes(x=date_time, y=tide)) +
  geom_line(size=0.7) +
  scale_x_datetime(breaks = scales::date_breaks("2 days"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  ylab("Tide height") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))

ggsave(here("figures", "sensors", "bml_tide.png"), height=20, width=40, units="cm")

Combine the tides with the pH/temp values for BML, plot it up!

bml_all <- full_join(bml, bml_tide)
## Joining, by = "date_time"
bml3 <- bml_all %>%
  drop_na(c(tide, p_h)) %>% #whoops, started off collecting data every 10 minutes and then switched to 15 minutes (so account for that by removing pH and tide values that don't overlap)
  pivot_longer(cols=temp_c:tide,
               names_to = "data",
               values_to = "value")
  
ggplot(bml3, aes(x=date_time, y=value, group=data)) +
  geom_line(aes(color=data), size=0.7) +
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))

ggsave(here("figures", "sensors", "bml_pH_temp_tide.png"), height=20, width=40, units="cm")

Remove measurements where the sensor was out of the water (at a tide lower than -0.5, possibly higher (TBD))

bml_detide <- bml_all %>%
  filter(tide>-0.5) %>%
  drop_na(c(p_h)) #drop observations that don't overlap (10 min vs 15 min sampling interval)

ggplot(bml_detide, aes(x=date_time)) +
  geom_line(aes(y=tide), color="red") + 
  geom_line(aes(y=temp_c), color="blue") + # Divide by 10 to get the same range than the temperature
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))

Check the tide height filtering for BML

bml_superfilter <- bml_all %>%
  filter(date_time > ymd_hms("2021-07-19 23:00:00"),
         date_time < ymd_hms("2021-07-26 23:00:00"))  #7-30 to 8-06 is also interesting

#Looking at the mid July dates, looks like I might need to filter out tides below -0.2? But then if I look at other dates, it's not as bad. -0.4 might actually be a good decision.

y1 <- bml_superfilter$p_h
y2 <- bml_superfilter$temp_c
y3 <- bml_superfilter$tide
x <- bml_superfilter$date_time

highchart() %>% 
  hc_add_series(data = y3, dashStyle="solid") %>% 
  hc_add_series(data = y2, yAxis = 1) %>% 
  hc_yAxis_multiples(
     list(lineWidth = 3, lineColor="#009E73", title=list(text="Tide cycle")),
     list(lineWidth = 3, lineColor="#0072B2", title=list(text="Temperature"))) %>%
    hc_xAxis(title = "Date", categories = x, breaks=10) %>%
  hc_colors(c("#009E73","#0072B2"))

Let’s filter more, now based on anomalous temperature measurements (spikes in July)

bml_detide_temp <- bml_detide %>%
  filter(!(date_time >= ymd_hms("2021-07-10 06:00:00") & date_time <= ymd_hms("2021-07-10 07:30:00")),
         !(date_time >= ymd_hms("2021-07-21 06:15:00") & date_time <= ymd_hms("2021-07-21 07:15:00")),
         !(date_time >= ymd_hms("2021-07-22 06:15:00") & date_time <= ymd_hms("2021-07-22 08:00:00")),
         !(date_time >= ymd_hms("2021-07-23 07:15:00") & date_time <= ymd_hms("2021-07-23 08:30:00")))

ggplot(bml_detide_temp, aes(x=date_time)) +
  geom_line(aes(y=temp_c), color="red") + 
  geom_line(aes(y=tide), color="blue") + # Divide by 10 to get the same range than the temperature
  scale_x_datetime(breaks = scales::date_breaks("1 day"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))

Filter even more, based on pH measurements, and generate final plot

# bml_detide_temp_ph <- bml_detide_temp %>%
#   arrange(date_time) %>% #make sure observations are in order by date/time
#   mutate(diff3 = p_h - lag(p_h, default = first(p_h))) %>% #find difference between two subsequent pH measurements to identify anomalies
#   filter(diff3>-0.5,
#          diff3<0.5) #remove weird readings on 7/10
# 
# ggplot(bml_detide_temp_ph, aes(x=date_time)) +
#   geom_line(aes(y=p_h), color="#009E73") + 
#   geom_line(aes(y=temp_c), color="#D55E00") + # Divide by 10 to get the same range than the temperature
#   geom_line(aes(y=tide), color="#0072B2") + # Divide by 10 to get the same range than the temperature
#   scale_x_datetime(breaks = scales::date_breaks("1 week"), 
#                     labels = date_format("%m/%d %H:%m")) +
#   annotate(geom="text", x=as.POSIXct("2021-05-31 00:10:00"), y=14, hjust=0, label="Temp (C)", color="#D55E00", size=5) +
#   annotate(geom="text", x=as.POSIXct("2021-08-16 00:10:00"), y=9, hjust=-0.1, label="pH", color="#009E73", size=5) +
#   annotate(geom="text", x=as.POSIXct("2021-05-31 00:10:00"), y=6, hjust=0, label="Tide", color="#0072B2", size=5) +
#   xlab("Date & time") +
#   ylab("Value") +
#   theme_bw() +
#   theme(axis.text.x=element_text(angle=45, vjust = 1, hjust=1, size=12),
#         axis.title.x=element_text(size=15),
#         axis.text.y=element_text(size=12),
#         axis.title.y=element_text(size=15))
# 
# ggsave(here("figures", "sensors", "bml_detide_tide-temp-pH.png"), height=20, width=40, units="cm")

LOL

Scrape tide data from http://tbone.biol.sc.edu/tide/

lol_tide <- read_html("http:///tide.arthroinfo.org/tideshow.cgi?tplotdir=horiz;gx=640;gy=240;caltype=ndp;type=mrare;interval=00%3A15;glen=94;units=feet;year=2021;month=06;day=14;hour=08;min=00;tzone=local;d_year=;d_month=01;d_day=01;d_hour=00;d_min=00;ampm24=24;site=Point%20Arguello%2C%20California") %>%
  html_elements("pre") %>% #select only the date, time, and tide values from the webpage
  html_text2() %>% #convert list to data table
  data.frame() %>% #convert table to data frame
  mutate(date_tide = str_split(., pattern = "\n")) %>% #split into rows by each time point
  unnest(date_tide) %>% #unnest into two columns 
  mutate(date_tide=as.factor(date_tide)) %>% #make column values factors 
  separate(date_tide, into = c("date", "space", "time", "time_zone", "tide"), sep="\\s") %>% #separate the values (separated by spaces) into their own columns
  select(-"space", -".") %>% #remove the "space" (blank space) column and duplicated column created by unnest() 
  unite("time", "time", "time_zone", sep="\ ",) %>% #join together time and time zone
  unite("date_time", "date", "time", sep="\ ") %>% #join together date and time/time zone
  drop_na() %>% #remove final row with NA (not sure why that's even there)
  mutate(date_time=ymd_hm(date_time), #apply lubridate to date/time column
         tide=as.numeric(tide)) #coerce tide values from character to numeric
## Warning: Expected 5 pieces. Missing pieces filled with `NA` in 1 rows [9025].

Plot it up

ggplot(lol_tide, aes(x=date_time, y=tide)) +
  geom_line(size=0.7) +
  scale_x_datetime(breaks = scales::date_breaks("2 days"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  ylab("Tide height") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))

ggsave(here("figures", "sensors", "lol_tide.png"), height=20, width=40, units="cm")

Combine the tides with the pH/temp values for LOL, plot it up!

# filter all data for LOL
lol <- all %>%
  filter(site=="Lompoc Landing")

lol_all <- full_join(lol, lol_tide)
## Joining, by = "date_time"
lol_plot <- lol_all %>%
  pivot_longer(cols=temp_c:tide,
               names_to = "data",
               values_to = "value")
  
ggplot(lol_plot, aes(x=date_time, y=value, group=data)) +
  geom_line(aes(color=data), size=0.7) +
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))
## Warning: Removed 42 row(s) containing missing values (geom_path).

ggsave(here("figures", "sensors", "lol_pH_temp_tide.png"), height=20, width=40, units="cm")
## Warning: Removed 42 row(s) containing missing values (geom_path).

Remove measurements where the sensor pool was disconnected from the ocean (at a tide lower than +0.5, likely higher (TBD))

lol_detide <- lol_all %>%
  filter(tide>0.5)

ggplot(lol_detide, aes(x=date_time)) +
  geom_line(aes(y=tide), color="red") + 
  geom_line(aes(y=temp_c), color="blue") +
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))
## Warning: Removed 18 row(s) containing missing values (geom_path).

Check the peaks (the peak is where the tide starts coming back in - when the pool is re-connected to the ocean and the temp drops), and ID where they are so I can filter

lol_check <- lol_detide %>%
  filter(date_time < ymd_hms("2021-06-30 23:00:00")) %>%
  arrange(date_time) %>% #make sure observations are in order by date/time
  mutate(diff = temp_c - lag(temp_c, default = first(temp_c))) %>% #find difference between two subsequent temp measurements to identify anomalies
  mutate(diff2 = temp_c - lead(temp_c, default = first(temp_c))) #find difference between two subsequent temp measurements to identify anomalies
  #filter(diff < 1 | diff > -1 | diff2 < 1 | diff2 > -1)

#Thank you stas g (https://stats.stackexchange.com/a/164830) for this function
find_peaks <- function (x, m = 3){
    shape <- diff(sign(diff(x, na.pad = FALSE)))
    pks <- sapply(which(shape < 0), FUN = function(i){
       z <- i - m + 1
       z <- ifelse(z > 0, z, 1)
       w <- i + m + 1
       w <- ifelse(w < length(x), w, length(x))
       if(all(x[c(z : i, (i + 2) : w)] <= x[i + 1])) return(i + 1) else return(numeric(0))
    })
     pks <- unlist(pks)
     pks
}

pk <- find_peaks(lol_check$temp_c, m = 50) #set a high threshold bc we need it...

lol_check_peak <- lol_detide %>%
  filter(date_time < ymd_hms("2021-06-30 23:00:00")) %>%
  arrange(date_time) %>% #make sure observations are in order by date/time
  mutate(diff = temp_c - lag(temp_c, default = first(temp_c))) %>% #find difference between two subsequent temp measurements to identify anomalies
  mutate(diff2 = temp_c - lead(temp_c, default = first(temp_c))) %>% #find difference between two subsequent temp measurements to identify anomalies
  #filter(diff < 1 | diff > -1 | diff2 < 1 | diff2 > -1)
  mutate(peak = ifelse(row_number() %in% c(pk) == TRUE, 1, 0)) #if a row ID matches the row ID found by find_peaks, then call it "1" (if not, "0")

ggplot(lol_check_peak, aes(x=date_time)) +
  geom_line(aes(y=tide), color="red") + 
  geom_line(aes(y=temp_c), color="blue") +
  #geom_line(aes(y=diff), color="green") +
  #geom_line(aes(y=diff2), color="orange") +
  geom_point(aes(y=temp_c, color=ifelse(peak>0, "red", "black"), size=ifelse(peak>0, 2, 0))) + #if it's a peak, color it red and make it big
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90)) +
  scale_color_identity() +
  scale_size_identity() +
  geom_vline(aes(xintercept = date_time), lol_check_peak %>% filter(peak == 1)) + #if it's a peak, draw a vertical line
  scale_y_continuous(breaks = round(seq(min(lol_check_peak$tide), max(lol_check_peak$tide), by = 0.5),1))

Looks like anything below a 1.5 tide height is definitely disconnected from the ocean

So now, let’s filter based on anomalous pH measurements

lol_detide_temp_ph <- lol_all %>%
  filter(tide>1.5) %>%
  arrange(date_time) %>% #make sure observations are in order by date/time
  mutate(diff3 = p_h - lag(p_h, default = first(p_h))) #doesn't look like pH values jump extraordinarily

ggplot(lol_detide_temp_ph, aes(x=date_time)) +
  geom_line(aes(y=p_h), color="blue") +
  geom_line(aes(y=diff3), color="green") +
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))
## Warning: Removed 13 row(s) containing missing values (geom_path).

## Warning: Removed 13 row(s) containing missing values (geom_path).

And plot it all up

ggplot(lol_detide_temp_ph, aes(x=date_time)) +
  geom_line(aes(y=p_h), color="#009E73") + 
  geom_line(aes(y=temp_c), color="#D55E00") + # Divide by 10 to get the same range than the temperature
  geom_line(aes(y=tide), color="#0072B2") + # Divide by 10 to get the same range than the temperature
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  annotate(geom="text", x=as.POSIXct("2021-8-12 00:01:00"), y=19, hjust=0, label="Temp (C)", color="#D55E00", size=5) +
  annotate(geom="text", x=as.POSIXct("2021-6-14 00:01:00"), y=9, hjust=-0.1, label="pH", color="#009E73", size=5) +
  annotate(geom="text", x=as.POSIXct("2021-6-14 00:01:00"), y=6, hjust=0, label="Tide", color="#0072B2", size=5) +
  xlab("Date & time") +
  ylab("Value") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=45, vjust = 1, hjust=1, size=12),
        axis.title.x=element_text(size=15),
        axis.text.y=element_text(size=12),
        axis.title.y=element_text(size=15))
## Warning: Removed 13 row(s) containing missing values (geom_path).

## Warning: Removed 13 row(s) containing missing values (geom_path).

ggsave(here("figures", "sensors", "lol_detide_tide-temp.png"), height=20, width=40, units="cm")
## Warning: Removed 13 row(s) containing missing values (geom_path).

## Warning: Removed 13 row(s) containing missing values (geom_path).

Plot up these data using a format similar to Li’s for the LTER data - welcome, highchart!

y1 <- lol_detide_temp_ph$p_h
y2 <- lol_detide_temp_ph$temp_c
y3 <- lol_detide_temp_ph$tide
x <- lol_detide_temp_ph$date_time

plot <- highchart() %>% 
  hc_add_series(data = y1, dashStyle="solid") %>% 
  hc_add_series(data = y2, yAxis = 1) %>% 
  hc_add_series(data = y3, yAxis = 2) %>%
  hc_yAxis_multiples(
     list(lineWidth = 3, lineColor='#D55E00', title=list(text="pH")),
     list(lineWidth = 3, lineColor="#009E73", title=list(text="Temperature")),
     list(lineWidth = 3, lineColor="#0072B2", title=list(text="Tide cycle"))) %>%
    hc_xAxis(title = "Date", categories = x, breaks=10) %>%
  hc_colors(c("#D55E00","#009E73", "#0072B2"))

htmlwidgets::saveWidget(widget = plot, file = here("figures", "sensors", "LOL_highchart.html"))
webshot::webshot(url = here("figures", "sensors", "LOL_highchart.html"), file = here("figures", "sensors", "LOL_highchart.png"), delay=3)

Well that was a bit of a waste of time

Let’s move on to continuing to process the data for “out of water”

lol_superfilter <- lol_all %>%
  filter(tide>3.232) %>%
  filter(date_time > ymd_hms("2021-07-26 23:00:00"),
         date_time < ymd_hms("2021-08-02 23:00:00"))

#3.29 is also a solid candidate also for tide filtering

y1 <- lol_superfilter$p_h
y2 <- lol_superfilter$temp_c
y3 <- lol_superfilter$tide
x <- lol_superfilter$date_time

highchart() %>% 
  hc_add_series(data = y1, dashStyle="solid") %>% 
  hc_add_series(data = y2, yAxis = 1) %>% 
  hc_add_series(data = y3, yAxis = 1) %>%
  hc_yAxis_multiples(
     list(lineWidth = 3, lineColor='#D55E00', title=list(text="pH")),
     list(lineWidth = 3, lineColor="#009E73", title=list(text="Temp")),
     list(lineWidth = 3, lineColor="#0072B2", title=list(text="Tide cycle"))) %>%
    hc_xAxis(title = "Date", categories = x, breaks=10) %>%
  hc_colors(c("#D55E00",
              "#009E73",
              "#0072B2"))

Nevermind it wasn’t a waste of time! Highcharts are super useful! Looks like the pool is definitely disconnected from the ocean at a tide lower than a +3.0…maybe a +3.02 (but possibly even +3.2 or +3.4)

Create df for LOL with values above tides of +3.232, and remove (or smooth?) anomalous time periods

lol_detided <- lol_detide_temp_ph %>%
  filter(tide>3.232) %>%
  filter(!(date_time >= ymd_hms("2021-07-31 13:30:00") & date_time <= ymd_hms("2021-07-31 14:00:00")),
         !(date_time >= ymd_hms("2021-08-01 13:45:00") & date_time < ymd_hms("2021-08-01 15:00:00"))) %>%
  drop_na() #remove times with tides but without pH values

ALG

Scrape tide data from http://tbone.biol.sc.edu/tide/

alg_tide <- read_html("http://tide.arthroinfo.org/tideshow.cgi?tplotdir=horiz;gx=640;gy=240;caltype=ndp;type=mrare;interval=00%3A15;glen=98;units=feet;year=2021;month=06;day=12;hour=01;min=00;tzone=local;d_year=;d_month=01;d_day=01;d_hour=00;d_min=00;ampm24=24;site=Gaviota%2C%20California") %>%
  html_elements("pre") %>% #select only the date, time, and tide values from the webpage
  html_text2() %>% #convert list to data table
  data.frame() %>% #convert table to data frame
  mutate(date_tide = str_split(., pattern = "\n")) %>% #split into rows by each time point
  unnest(date_tide) %>% #unnest into two columns 
  mutate(date_tide=as.factor(date_tide)) %>% #make column values factors 
  separate(date_tide, into = c("date", "space", "time", "time_zone", "tide"), sep="\\s") %>% #separate the values (separated by spaces) into their own columns
  select(-"space", -".") %>% #remove the "space" (blank space) column and duplicated column created by unnest() 
  unite("time", "time", "time_zone", sep="\ ",) %>% #join together time and time zone
  unite("date_time", "date", "time", sep="\ ") %>% #join together date and time/time zone
  drop_na() %>% #remove final row with NA (not sure why that's even there)
  mutate(date_time=ymd_hm(date_time), #apply lubridate to date/time column
         tide=as.numeric(tide)) #coerce tide values from character to numeric
## Warning: Expected 5 pieces. Missing pieces filled with `NA` in 1 rows [9409].

Plot it up

ggplot(alg_tide, aes(x=date_time, y=tide)) +
  geom_line(size=0.7) +
  scale_x_datetime(breaks = scales::date_breaks("2 days"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  ylab("Tide height") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))

ggsave(here("figures", "sensors", "alg_tide.png"), height=20, width=40, units="cm")

Combine the tides with the pH/temp values for ALG, plot it up!

# filter all data for ALG
alg <- all %>%
  filter(site=="Alegria")

alg_all <- full_join(alg, alg_tide)
## Joining, by = "date_time"
alg_plot <- alg_all %>%
  pivot_longer(cols=temp_c:tide,
               names_to = "data",
               values_to = "value")
  
ggplot(alg_plot, aes(x=date_time, y=value, group=data)) +
  geom_line(aes(color=data), size=0.7) +
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))
## Warning: Removed 48 row(s) containing missing values (geom_path).

ggsave(here("figures", "sensors", "alg_pH_temp_tide.png"), height=20, width=40, units="cm")
## Warning: Removed 48 row(s) containing missing values (geom_path).

Remove anomalous measurements (like where the sensor was likely inundated with sand!)

alg_desand <- alg_all %>%
  filter(date_time > ymd_hms("2021-06-18 01:00:00")) %>% #super weird, but pH looks like it stabilizes at 1AM post sand
  drop_na() %>% #remove rows with only tide values
  distinct(date_time, .keep_all = TRUE) #remove duplicated data (not sure where those came from)

ggplot(alg_desand, aes(x=date_time, y=p_h)) +
  geom_line(size=0.7) +
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=90))

Check anomalous points and filter by tide height given pH patterns

alg_highchart <- alg_desand %>%
  filter(date_time >= ymd_hms("2021-07-17 01:00:00") & date_time <= ymd_hms("2021-07-27 01:00:00")) %>%
  filter(tide>0.9)

y1 <- alg_highchart$p_h
y2 <- alg_highchart$temp_c
y3 <- alg_highchart$tide
x <- alg_highchart$date_time

highchart() %>% 
  hc_add_series(data = y1, dashStyle="solid") %>% 
  hc_add_series(data = y3, yAxis = 1) %>% 
  #hc_add_series(data = y3, yAxis = 2) %>%
  hc_yAxis_multiples(
     list(lineWidth = 3, lineColor='#D55E00', title=list(text="pH")),
     #list(lineWidth = 3, lineColor="#009E73", title=list(text="Temp")),
     list(lineWidth = 3, lineColor="#0072B2", title=list(text="Tide cycle"))) %>%
    hc_xAxis(title = "Date", categories = x) %>%
  hc_colors(c("#D55E00",
              "#009E73", 
              "#0072B2"))

These points look fine (and filtering should be easy since the sensor is always connected to the ocean) - but the high chart is suggesting a pattern around 0.00 tide (so let’s try removing everything below 0.00… actually, let’s try 0.6 since some of those pH values are quite low and consistently dropping at low tide it seems)

Tried 0.00, but still seeing steep drop in pH with tide cycle Tried 0.60, but still seeing steep drop in pH Tried 0.80, but still seeing some drop in pH 0.90 appears to be OK at not creating massive drop in pH but still having some reasonable cycle

Detide ALG data and plot

alg_detide <- alg_desand %>%
  filter(tide>0.9)

ggplot(alg_detide, aes(x=date_time)) +
  geom_line(aes(y=p_h), color="#009E73") + 
  geom_line(aes(y=temp_c), color="#D55E00") + # Divide by 10 to get the same range than the temperature
  geom_line(aes(y=tide), color="#0072B2") + # Divide by 10 to get the same range than the temperature
  scale_x_datetime(breaks = scales::date_breaks("4 days"), 
                    labels = date_format("%m/%d %H:%m")) +
  annotate(geom="text", x=as.POSIXct("2021-6-18 00:01:00"), y=19, hjust=0, label="Temp (C)", color="#D55E00", size=5) +
  annotate(geom="text", x=as.POSIXct("2021-6-18 00:01:00"), y=10, hjust=-0.1, label="pH", color="#009E73", size=5) +
  annotate(geom="text", x=as.POSIXct("2021-6-18 00:01:00"), y=-1.5, hjust=0, label="Tide", color="#0072B2", size=5) +
  xlab("Date & time") +
  ylab("Value") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=45, vjust = 1, hjust=1, size=12),
        axis.title.x=element_text(size=15),
        axis.text.y=element_text(size=12),
        axis.title.y=element_text(size=15))

ggsave(here("figures", "sensors", "alg_detide.png"), height=20, width=40, units="cm")

There’s still some noise at a few dates, let’s filter those dates out of the data ### Remove (or smooth?) time periods with a LOT of noise

alg_no_noise <- alg_detide %>%
  filter(!(date_time > ymd_hms("2021-07-17 11:00:00") & date_time < ymd_hms("2021-07-17 19:30:00"))) %>% #this is a chunk of time with a lot of noise
  filter(!(date_time %in% (ymd_hms("2021-06-19 10:15:00", #remove time points with significant jumps in the wrong direction
                                  "2021-06-29 14:15:00",
                                  "2021-06-29 15:30:00",
                                  "2021-06-30 11:45:00",
                                  "2021-06-30 14:15:00",
                                  "2021-06-30 16:15:00",
                                  "2021-06-30 19:15:00",
                                  "2021-07-02 06:15:00",
                                  "2021-07-02 07:45:00",
                                  "2021-07-02 10:45:00",
                                  "2021-07-04 09:45:00",
                                  "2021-07-04 12:00:00",
                                  "2021-07-04 15:15:00"))))

y1 <- alg_no_noise$p_h
y3 <- alg_no_noise$tide
x <- alg_no_noise$date_time

highchart() %>% 
  hc_add_series(data = y1, dashStyle="solid") %>% 
  hc_add_series(data = y3, yAxis = 1) %>% 
  hc_yAxis_multiples(
     list(lineWidth = 3, lineColor='#D55E00', title=list(text="pH")),
     list(lineWidth = 3, lineColor="#0072B2", title=list(text="Tide cycle"))) %>%
    hc_xAxis(title = "Date", categories = x) %>%
  hc_colors(c("#D55E00", 
              "#0072B2"))

Now let’s actually plot up a comparison of the pH values across sites

#join all sites into one df (and remove diff columns)
ph_all <- merge_recurse(list(alg_no_noise, lol_detided, bml_detide_temp)) %>%
  select(-diff3)

#set order for sites
ph_all$site <- factor(ph_all$site, levels=c("Lompoc Landing","Alegria","Bodega Bay"))

#set custom color for sites
pal <- c(
  "Alegria" = "#D55E00",
  "Lompoc Landing" = "#009E73", 
  "Bodega Bay" = "#0072B2"
)

ggplot(ph_all, aes(x=date_time, y=p_h, group=site)) +
  geom_line(aes(color=site), size=0.7, alpha=0.8) +
  scale_color_manual(values = pal) + #color lines by custom site color palette
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  ylab("pH") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=45, vjust = 1, hjust=1, size=12),
        axis.title.x=element_text(size=15),
        axis.text.y=element_text(size=12),
        axis.title.y=element_text(size=15),
        legend.position = "none")

ggsave(here("figures", "sensors", "ph_all.png"), height=20, width=40, units="cm")

# For presentations, highlight one line at a time per site
#BML focal
ph_all$site <- factor(ph_all$site, levels=c("Lompoc Landing","Alegria","Bodega Bay"))

ggplot(ph_all, aes(x=date_time, y=p_h, group=site)) +
  geom_line(aes(color=site, alpha=site), size=0.7) +
  scale_color_manual(values = pal) + #color lines by custom site color palette
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  ylab("pH") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=45, vjust = 1, hjust=1, size=12),
        axis.title.x=element_text(size=15),
        axis.text.y=element_text(size=12),
        axis.title.y=element_text(size=15),
        legend.position = "none") +
  scale_alpha_manual(values=c(0.5,0.5,1))

ggsave(here("figures", "sensors", "ph_bml.png"), height=20, width=40, units="cm")

#LOL focal
ph_all$site <- factor(ph_all$site, levels=c("Alegria","Bodega Bay", "Lompoc Landing"))

ggplot(ph_all, aes(x=date_time, y=p_h, group=site)) +
  geom_line(aes(color=site, alpha=site), size=0.7) +
  scale_color_manual(values = pal) + #color lines by custom site color palette
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  ylab("pH") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=45, vjust = 1, hjust=1, size=12),
        axis.title.x=element_text(size=15),
        axis.text.y=element_text(size=12),
        axis.title.y=element_text(size=15),
        legend.position = "none") +
  scale_alpha_manual(values=c(0.5,0.5,1))

ggsave(here("figures", "sensors", "ph_lol.png"), height=20, width=40, units="cm")

#ALG focal
ph_all$site <- factor(ph_all$site, levels=c("Lompoc Landing","Bodega Bay", "Alegria"))

ggplot(ph_all, aes(x=date_time, y=p_h, group=site)) +
  geom_line(aes(color=site, alpha=site), size=0.7) +
  scale_color_manual(values = pal) + #color lines by custom site color palette
  scale_x_datetime(breaks = scales::date_breaks("1 week"), 
                    labels = date_format("%m/%d %H:%m")) +
  xlab("Date time") +
  ylab("pH") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=45, vjust = 1, hjust=1, size=12),
        axis.title.x=element_text(size=15),
        axis.text.y=element_text(size=12),
        axis.title.y=element_text(size=15),
        legend.position = "none") +
  scale_alpha_manual(values=c(0.5,0.5,1))

ggsave(here("figures", "sensors", "ph_alg.png"), height=20, width=40, units="cm")

Create a nice table

# Calculate min, max, average pH values for each site
alg_min <- (min(alg_no_noise$p_h))
alg_max <- (max(alg_no_noise$p_h))
alg_avg <- (median(alg_no_noise$p_h))

lol_min <- (min(lol_detided$p_h))
lol_max <- (max(lol_detided$p_h))
lol_avg <- (median(lol_detided$p_h))

bml_min <- (min(bml_detide_temp$p_h))
bml_max <- (max(bml_detide_temp$p_h))
bml_avg <- (median(bml_detide_temp$p_h))

ph_table <- tribble(
  ~"Site", ~"Min pH", ~"Max pH", ~"Median pH",
  "Bodega Marine Lab", bml_min, bml_max, bml_avg,
  "Lompoc Landing", lol_min, lol_max, lol_avg,
  "Alegria", alg_min, alg_max, alg_avg) 

ph_table %>%
  gt() %>%
  fmt_number(
  columns = c("Min pH":"Median pH"),
  rows = everything(),
  decimals = 2) %>%
  data_color(
    columns = c("Site"),
    colors = scales::col_factor( # <- bc it's a factor
      palette = c("#D55E00","#0072B2","#009E73"),
      domain = c("Bodega Marine Lab", "Lompoc Landing", "Alegria"))) %>%
  gtsave(here("figures", "sensors", "ph_table.png"))

Summary (what I did)

At BML, sampling interval started at every 10 minutes and then switched to 15 minutes for all sites

Filtering for tide heights based on temp/pH patterns - All points at ALG were removed at tide heights below 0.9 - All points at LOL were removed at tide heights below 3.232 - All points at BML were removed at tide heights below -0.5

Removed anomalous time points (noise or major jumps, especially in wrong direction) - 20 from BML - All data collected before 06/18/2021 at ALG, plus 46 extra points - 8 from LOL